libname iri 'J:\Dropbox\Projects\Walmart\IRI data';
libname perm 'J:\Dropbox\My Solo Projects\Inactive\Field Paper\Exp';

* Extract category revenue, volume sales, average price, weighted price (quarterly revenue)
* Keep UPC data with product char for food items;

data work.cpi;
	infile 'J:\Dropbox\Projects\Walmart\IRI data\cpi.csv' delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
    informat observation_date yymmdd10.;
    informat cpi best32. ;
    format observation_date yymmdd10. ;
    format cpi best12. ;
    input observation_date cpi;
run;
data work.cpi (keep = cpi monthid);
	set work.cpi;
	month = month(observation_date);
	year = year(observation_date);
	monthid = month + 12*(year-2001);
run;

*beer;

DATA work.beer1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\beer\beer_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.beer2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\beer\beer_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.beer3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\beer\beer_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.beer4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\beer\beer_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.beer5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\beer\beer_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.beer6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\beer\beer_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.beer7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\beer\beer_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.beer8;
	infile "J:\IRI Data\Academic Dataset External\Year8\beer\beer_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.beer9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\beer\beer_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.beer10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\beer\beer_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.beer11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\beer\beer_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.beer (drop = SY GE VEND ITEM);
	set work.beer1-work.beer11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete beer1-beer11;

proc sort data = work.beer;
	by week;
data work.beer_dates (drop = week_start week_end month qtr);
	merge work.beer (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
run;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_beer.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_beer.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_beer.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.beer_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.beer_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.beer_vol_sales as
select iri_key, week, sum(vol_eq * units) as beer_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.beer_revenue as
select iri_key, week, sum(dollars) as beer
from work.beer
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.beer_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.beer_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.beer_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=beer_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.beer_dates;
	by colupc;
data work.beer_dates_price;
	merge work.beer_dates (in=in1) work.beer_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.beer_dates_price;
	merge work.beer_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.beer_count as
select iri_key, qtrid, q, count(distinct colupc) as beer_count
from work.beer_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.beer_count out=work.beer_count prefix=beer_count_;
	by iri_key qtrid;
	id q;
	var beer_count;
run;
data work.beer_count (drop = _NAME_);
   set work.beer_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete beer beer_dates beer_dates_price pctl beer_panel_cpi;

run;

*blades;

DATA work.blades1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\blades\blades_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.blades2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\blades\blades_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.blades3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\blades\blades_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.blades4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\blades\blades_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.blades5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\blades\blades_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.blades6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\blades\blades_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.blades7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\blades\blades_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.blades8;
	infile "J:\IRI Data\Academic Dataset External\Year8\blades\blades_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.blades9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\blades\blades_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.blades10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\blades\blades_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.blades11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\blades\blades_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.blades (drop = SY GE VEND ITEM);
	set work.blades1-work.blades11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete blades1-blades11;

proc sort data = work.blades;
	by week;
data work.blades_dates (drop = week_start week_end month qtr);
	merge work.blades (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
run;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_blades.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_blades.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_blades.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet2$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.blades_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.blades_vol_sales as
select iri_key, week, sum(vol_eq * units) as blades_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.blades_revenue as
select iri_key, week, sum(dollars) as blades
from work.blades
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.blades_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.blades_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.blades_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=blades_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.blades_dates;
	by colupc;
data work.blades_dates_price;
	merge work.blades_dates (in=in1) work.blades_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.blades_dates_price;
	merge work.blades_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.blades_count as
select iri_key, qtrid, q, count(distinct colupc) as blades_count
from work.blades_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.blades_count out=work.blades_count prefix=blades_count_;
	by iri_key qtrid;
	id q;
	var blades_count;
run;
data work.blades_count (drop = _NAME_);
   set work.blades_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete blades blades_dates blades_dates_price pctl blades_panel_cpi;

run;

*carbbev;

DATA work.carbbev1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\carbbev\carbbev_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.carbbev2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\carbbev\carbbev_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.carbbev3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\carbbev\carbbev_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.carbbev4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\carbbev\carbbev_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.carbbev5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\carbbev\carbbev_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.carbbev6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\carbbev\carbbev_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.carbbev7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\carbbev\carbbev_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.carbbev8;
	infile "J:\IRI Data\Academic Dataset External\Year8\carbbev\carbbev_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.carbbev9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\carbbev\carbbev_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.carbbev10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\carbbev\carbbev_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.carbbev11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\carbbev\carbbev_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.carbbev (drop = SY GE VEND ITEM);
	set work.carbbev1-work.carbbev11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete carbbev1-carbbev11;

proc sort data = work.carbbev;
	by week;
data work.carbbev_dates (drop = week_start week_end month qtr);
	merge work.carbbev (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_carbbev.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq parent vendor product_type package);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_carbbev.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq parent vendor product_type package);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_carbbev.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq parent vendor product_type package);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.carbbev_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.carbbev_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.carbbev_vol_sales as
select iri_key, week, sum(vol_eq * units) as carbbev_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.carbbev_revenue as
select iri_key, week, sum(dollars) as carbbev
from work.carbbev
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.carbbev_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.carbbev_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.carbbev_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=carbbev_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.carbbev_dates;
	by colupc;
data work.carbbev_dates_price;
	merge work.carbbev_dates (in=in1) work.carbbev_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.carbbev_dates_price;
	merge work.carbbev_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.carbbev_count as
select iri_key, qtrid, q, count(distinct colupc) as carbbev_count
from work.carbbev_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.carbbev_count out=work.carbbev_count prefix=carbbev_count_;
	by iri_key qtrid;
	id q;
	var carbbev_count;
run;
data work.carbbev_count (drop = _NAME_);
   set work.carbbev_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete carbbev carbbev_dates carbbev_dates_price pctl carbbev_panel_cpi;

run;

*cigets;

DATA work.cigets1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\cigets\cigets_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.cigets2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\cigets\cigets_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.cigets3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\cigets\cigets_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.cigets4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\cigets\cigets_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.cigets5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\cigets\cigets_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.cigets6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\cigets\cigets_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.cigets7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\cigets\cigets_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.cigets8;
	infile "J:\IRI Data\Academic Dataset External\Year8\cigets\cigets_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.cigets9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\cigets\cigets_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.cigets10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\cigets\cigets_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.cigets11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\cigets\cigets_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.cigets (drop = SY GE VEND ITEM);
	set work.cigets1-work.cigets11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete cigets1-cigets11;

proc sort data = work.cigets;
	by week;
data work.cigets_dates (drop = week_start week_end month qtr);
	merge work.cigets (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_cigs.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_cigets.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_cigets.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.cigets_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.cigets_vol_sales as
select iri_key, week, sum(vol_eq * units) as cigets_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.cigets_revenue as
select iri_key, week, sum(dollars) as cigets
from work.cigets
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.cigets_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.cigets_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.cigets_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=cigets_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.cigets_dates;
	by colupc;
data work.cigets_dates_price;
	merge work.cigets_dates (in=in1) work.cigets_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.cigets_dates_price;
	merge work.cigets_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.cigets_count as
select iri_key, qtrid, q, count(distinct colupc) as cigets_count
from work.cigets_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.cigets_count out=work.cigets_count prefix=cigets_count_;
	by iri_key qtrid;
	id q;
	var cigets_count;
run;
data work.cigets_count (drop = _NAME_);
   set work.cigets_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete cigets cigets_dates cigets_dates_price pctl cigets_panel_cpi;

run;

*coffee;

DATA work.coffee1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\coffee\coffee_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coffee2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\coffee\coffee_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coffee3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\coffee\coffee_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coffee4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\coffee\coffee_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coffee5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\coffee\coffee_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coffee6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\coffee\coffee_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coffee7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\coffee\coffee_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coffee8;
	infile "J:\IRI Data\Academic Dataset External\Year8\coffee\coffee_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coffee9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\coffee\coffee_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coffee10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\coffee\coffee_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coffee11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\coffee\coffee_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.coffee (drop = SY GE VEND ITEM);
	set work.coffee1-work.coffee11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete coffee1-coffee11;

proc sort data = work.coffee;
	by week;
data work.coffee_dates (drop = week_start week_end month qtr);
	merge work.coffee (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_coffee.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = l2 product colupc vol_eq form package);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_coffee.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = l2 product colupc vol_eq form package);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_coffee.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = l2 product colupc vol_eq form package);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.coffee_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.coffee_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.coffee_vol_sales as
select iri_key, week, sum(vol_eq * units) as coffee_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.coffee_revenue as
select iri_key, week, sum(dollars) as coffee
from work.coffee
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.coffee_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.coffee_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.coffee_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=coffee_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.coffee_dates;
	by colupc;
data work.coffee_dates_price;
	merge work.coffee_dates (in=in1) work.coffee_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.coffee_dates_price;
	merge work.coffee_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.coffee_count as
select iri_key, qtrid, q, count(distinct colupc) as coffee_count
from work.coffee_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.coffee_count out=work.coffee_count prefix=coffee_count_;
	by iri_key qtrid;
	id q;
	var coffee_count;
run;
data work.coffee_count (drop = _NAME_);
   set work.coffee_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete coffee coffee_dates coffee_dates_price pctl coffee_panel_cpi;

run;

*coldcer;

DATA work.coldcer1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\coldcer\coldcer_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coldcer2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\coldcer\coldcer_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coldcer3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\coldcer\coldcer_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coldcer4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\coldcer\coldcer_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coldcer5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\coldcer\coldcer_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coldcer6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\coldcer\coldcer_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coldcer7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\coldcer\coldcer_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coldcer8;
	infile "J:\IRI Data\Academic Dataset External\Year8\coldcer\coldcer_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coldcer9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\coldcer\coldcer_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coldcer10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\coldcer\coldcer_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.coldcer11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\coldcer\coldcer_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.coldcer (drop = SY GE VEND ITEM);
	set work.coldcer1-work.coldcer11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete coldcer1-coldcer11;

proc sort data = work.coldcer;
	by week;
data work.coldcer_dates (drop = week_start week_end month qtr);
	merge work.coldcer (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_cereal.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_cereal.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_coldcer.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.coldcer_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.coldcer_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.coldcer_vol_sales as
select iri_key, week, sum(vol_eq * units) as coldcer_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.coldcer_revenue as
select iri_key, week, sum(dollars) as coldcer
from work.coldcer
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.coldcer_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.coldcer_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.coldcer_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=coldcer_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.coldcer_dates;
	by colupc;
data work.coldcer_dates_price;
	merge work.coldcer_dates (in=in1) work.coldcer_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.coldcer_dates_price;
	merge work.coldcer_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.coldcer_count as
select iri_key, qtrid, q, count(distinct colupc) as coldcer_count
from work.coldcer_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.coldcer_count out=work.coldcer_count prefix=coldcer_count_;
	by iri_key qtrid;
	id q;
	var coldcer_count;
run;
data work.coldcer_count (drop = _NAME_);
   set work.coldcer_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete coldcer coldcer_dates coldcer_dates_price pctl coldcer_panel_cpi;

run;

*deod;

DATA work.deod1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\deod\deod_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.deod2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\deod\deod_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.deod3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\deod\deod_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.deod4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\deod\deod_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.deod5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\deod\deod_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.deod6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\deod\deod_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.deod7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\deod\deod_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.deod8;
	infile "J:\IRI Data\Academic Dataset External\Year8\deod\deod_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.deod9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\deod\deod_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.deod10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\deod\deod_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.deod11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\deod\deod_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.deod (drop = SY GE VEND ITEM);
	set work.deod1-work.deod11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete deod1-deod11;

proc sort data = work.deod;
	by week;
data work.deod_dates (drop = week_start week_end month qtr);
	merge work.deod (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_deod.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_deod.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_deod.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.deod_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.deod_vol_sales as
select iri_key, week, sum(vol_eq * units) as deod_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.deod_revenue as
select iri_key, week, sum(dollars) as deod
from work.deod
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.deod_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.deod_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.deod_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=deod_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.deod_dates;
	by colupc;
data work.deod_dates_price;
	merge work.deod_dates (in=in1) work.deod_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.deod_dates_price;
	merge work.deod_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.deod_count as
select iri_key, qtrid, q, count(distinct colupc) as deod_count
from work.deod_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.deod_count out=work.deod_count prefix=deod_count_;
	by iri_key qtrid;
	id q;
	var deod_count;
run;
data work.deod_count (drop = _NAME_);
   set work.deod_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete deod deod_dates deod_dates_price pctl deod_panel_cpi;

run;

*diapers;

DATA work.diapers1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\diapers\diapers_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.diapers2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\diapers\diapers_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.diapers3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\diapers\diapers_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.diapers4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\diapers\diapers_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.diapers5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\diapers\diapers_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.diapers6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\diapers\diapers_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.diapers7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\diapers\diapers_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.diapers8;
	infile "J:\IRI Data\Academic Dataset External\Year8\diapers\diapers_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.diapers9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\diapers\diapers_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.diapers10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\diapers\diapers_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.diapers11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\diapers\diapers_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.diapers (drop = SY GE VEND ITEM);
	set work.diapers1-work.diapers11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete diapers1-diapers11;

proc sort data = work.diapers;
	by week;
data work.diapers_dates (drop = week_start week_end month qtr);
	merge work.diapers (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_diapers.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_diapers.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_diaper.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.diapers_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.diapers_vol_sales as
select iri_key, week, sum(vol_eq * units) as diapers_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.diapers_revenue as
select iri_key, week, sum(dollars) as diapers
from work.diapers
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.diapers_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.diapers_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.diapers_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=diapers_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.diapers_dates;
	by colupc;
data work.diapers_dates_price;
	merge work.diapers_dates (in=in1) work.diapers_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.diapers_dates_price;
	merge work.diapers_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.diapers_count as
select iri_key, qtrid, q, count(distinct colupc) as diapers_count
from work.diapers_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.diapers_count out=work.diapers_count prefix=diapers_count_;
	by iri_key qtrid;
	id q;
	var diapers_count;
run;
data work.diapers_count (drop = _NAME_);
   set work.diapers_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete diapers diapers_dates diapers_dates_price pctl diapers_panel_cpi;

run;

*factiss;

DATA work.factiss1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\factiss\factiss_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.factiss2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\factiss\factiss_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.factiss3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\factiss\factiss_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.factiss4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\factiss\factiss_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.factiss5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\factiss\factiss_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.factiss6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\factiss\factiss_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.factiss7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\factiss\factiss_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.factiss8;
	infile "J:\IRI Data\Academic Dataset External\Year8\factiss\factiss_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.factiss9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\factiss\factiss_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.factiss10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\factiss\factiss_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.factiss11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\factiss\factiss_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.factiss (drop = SY GE VEND ITEM);
	set work.factiss1-work.factiss11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete factiss1-factiss11;

proc sort data = work.factiss;
	by week;
data work.factiss_dates (drop = week_start week_end month qtr);
	merge work.factiss (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_tissue.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_factiss.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_factiss.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.factiss_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.factiss_vol_sales as
select iri_key, week, sum(vol_eq * units) as factiss_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.factiss_revenue as
select iri_key, week, sum(dollars) as factiss
from work.factiss
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.factiss_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.factiss_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.factiss_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=factiss_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.factiss_dates;
	by colupc;
data work.factiss_dates_price;
	merge work.factiss_dates (in=in1) work.factiss_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.factiss_dates_price;
	merge work.factiss_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.factiss_count as
select iri_key, qtrid, q, count(distinct colupc) as factiss_count
from work.factiss_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.factiss_count out=work.factiss_count prefix=factiss_count_;
	by iri_key qtrid;
	id q;
	var factiss_count;
run;
data work.factiss_count (drop = _NAME_);
   set work.factiss_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete factiss factiss_dates factiss_dates_price pctl factiss_panel_cpi;

run;

*fzdinent;

DATA work.fzdinent1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\fzdinent\fzdinent_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzdinent2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\fzdinent\fzdinent_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzdinent3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\fzdinent\fzdinent_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzdinent4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\fzdinent\fzdinent_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzdinent5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\fzdinent\fzdinent_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzdinent6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\fzdinent\fzdinent_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzdinent7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\fzdinent\fzdinent_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzdinent8;
	infile "J:\IRI Data\Academic Dataset External\Year8\fzdinent\fzdinent_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzdinent9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\fzdinent\fzdinent_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzdinent10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\fzdinent\fzdinent_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzdinent11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\fzdinent\fzdinent_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.fzdinent (drop = SY GE VEND ITEM);
	set work.fzdinent1-work.fzdinent11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete fzdinent1-fzdinent11;

proc sort data = work.fzdinent;
	by week;
data work.fzdinent_dates (drop = week_start week_end month qtr);
	merge work.fzdinent (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_fzdin.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq product_type size form l2);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_fzdin.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq product_type size form l2);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_fzdinent.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq product_type size form l2);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.fzdinent_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.fzdinent_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.fzdinent_vol_sales as
select iri_key, week, sum(vol_eq * units) as fzdinent_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.fzdinent_revenue as
select iri_key, week, sum(dollars) as fzdinent
from work.fzdinent
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.fzdinent_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.fzdinent_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.fzdinent_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=fzdinent_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.fzdinent_dates;
	by colupc;
data work.fzdinent_dates_price;
	merge work.fzdinent_dates (in=in1) work.fzdinent_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.fzdinent_dates_price;
	merge work.fzdinent_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.fzdinent_count as
select iri_key, qtrid, q, count(distinct colupc) as fzdinent_count
from work.fzdinent_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.fzdinent_count out=work.fzdinent_count prefix=fzdinent_count_;
	by iri_key qtrid;
	id q;
	var fzdinent_count;
run;
data work.fzdinent_count (drop = _NAME_);
   set work.fzdinent_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete fzdinent fzdinent_dates fzdinent_dates_price pctl fzdinent_panel_cpi;

run;

*fzpizza;

DATA work.fzpizza1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\fzpizza\fzpizza_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzpizza2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\fzpizza\fzpizza_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzpizza3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\fzpizza\fzpizza_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzpizza4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\fzpizza\fzpizza_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzpizza5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\fzpizza\fzpizza_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzpizza6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\fzpizza\fzpizza_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzpizza7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\fzpizza\fzpizza_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzpizza8;
	infile "J:\IRI Data\Academic Dataset External\Year8\fzpizza\fzpizza_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzpizza9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\fzpizza\fzpizza_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzpizza10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\fzpizza\fzpizza_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.fzpizza11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\fzpizza\fzpizza_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.fzpizza (drop = SY GE VEND ITEM);
	set work.fzpizza1-work.fzpizza11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete fzpizza1-fzpizza11;

proc sort data = work.fzpizza;
	by week;
data work.fzpizza_dates (drop = week_start week_end month qtr);
	merge work.fzpizza (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_fpizza.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq parent vendor product_type package);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_fpizza.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq parent vendor product_type package);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_fzpizza.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq parent vendor product_type package);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.fzpizza_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.fzpizza_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.fzpizza_vol_sales as
select iri_key, week, sum(vol_eq * units) as fzpizza_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.fzpizza_revenue as
select iri_key, week, sum(dollars) as fzpizza
from work.fzpizza
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.fzpizza_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.fzpizza_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.fzpizza_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=fzpizza_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.fzpizza_dates;
	by colupc;
data work.fzpizza_dates_price;
	merge work.fzpizza_dates (in=in1) work.fzpizza_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.fzpizza_dates_price;
	merge work.fzpizza_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.fzpizza_count as
select iri_key, qtrid, q, count(distinct colupc) as fzpizza_count
from work.fzpizza_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.fzpizza_count out=work.fzpizza_count prefix=fzpizza_count_;
	by iri_key qtrid;
	id q;
	var fzpizza_count;
run;
data work.fzpizza_count (drop = _NAME_);
   set work.fzpizza_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete fzpizza fzpizza_dates fzpizza_dates_price pctl fzpizza_panel_cpi;

run;

*hhclean;

DATA work.hhclean1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\hhclean\hhclean_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hhclean2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\hhclean\hhclean_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hhclean3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\hhclean\hhclean_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hhclean4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\hhclean\hhclean_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hhclean5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\hhclean\hhclean_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hhclean6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\hhclean\hhclean_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hhclean7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\hhclean\hhclean_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hhclean8;
	infile "J:\IRI Data\Academic Dataset External\Year8\hhclean\hhclean_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hhclean9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\hhclean\hhclean_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hhclean10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\hhclean\hhclean_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hhclean11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\hhclean\hhclean_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.hhclean (drop = SY GE VEND ITEM);
	set work.hhclean1-work.hhclean11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete hhclean1-hhclean11;

proc sort data = work.hhclean;
	by week;
data work.hhclean_dates (drop = week_start week_end month qtr);
	merge work.hhclean (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_hhclean.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_hhclean.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_hhclean.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.hhclean_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.hhclean_vol_sales as
select iri_key, week, sum(vol_eq * units) as hhclean_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.hhclean_revenue as
select iri_key, week, sum(dollars) as hhclean
from work.hhclean
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.hhclean_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.hhclean_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.hhclean_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=hhclean_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.hhclean_dates;
	by colupc;
data work.hhclean_dates_price;
	merge work.hhclean_dates (in=in1) work.hhclean_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.hhclean_dates_price;
	merge work.hhclean_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.hhclean_count as
select iri_key, qtrid, q, count(distinct colupc) as hhclean_count
from work.hhclean_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.hhclean_count out=work.hhclean_count prefix=hhclean_count_;
	by iri_key qtrid;
	id q;
	var hhclean_count;
run;
data work.hhclean_count (drop = _NAME_);
   set work.hhclean_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete hhclean hhclean_dates hhclean_dates_price pctl hhclean_panel_cpi;

run;

*hotdog;

DATA work.hotdog1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\hotdog\hotdog_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hotdog2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\hotdog\hotdog_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hotdog3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\hotdog\hotdog_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hotdog4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\hotdog\hotdog_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hotdog5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\hotdog\hotdog_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hotdog6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\hotdog\hotdog_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hotdog7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\hotdog\hotdog_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hotdog8;
	infile "J:\IRI Data\Academic Dataset External\Year8\hotdog\hotdog_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hotdog9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\hotdog\hotdog_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hotdog10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\hotdog\hotdog_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.hotdog11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\hotdog\hotdog_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.hotdog (drop = SY GE VEND ITEM);
	set work.hotdog1-work.hotdog11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete hotdog1-hotdog11;

proc sort data = work.hotdog;
	by week;
data work.hotdog_dates (drop = week_start week_end month qtr);
	merge work.hotdog (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_hotdog.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_hotdog.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_hotdog.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.hotdog_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.hotdog_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.hotdog_vol_sales as
select iri_key, week, sum(vol_eq * units) as hotdog_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.hotdog_revenue as
select iri_key, week, sum(dollars) as hotdog
from work.hotdog
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.hotdog_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.hotdog_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.hotdog_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=hotdog_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.hotdog_dates;
	by colupc;
data work.hotdog_dates_price;
	merge work.hotdog_dates (in=in1) work.hotdog_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.hotdog_dates_price;
	merge work.hotdog_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.hotdog_count as
select iri_key, qtrid, q, count(distinct colupc) as hotdog_count
from work.hotdog_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.hotdog_count out=work.hotdog_count prefix=hotdog_count_;
	by iri_key qtrid;
	id q;
	var hotdog_count;
run;
data work.hotdog_count (drop = _NAME_);
   set work.hotdog_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete hotdog hotdog_dates hotdog_dates_price pctl hotdog_panel_cpi;

run;

*laundet;

DATA work.laundet1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\laundet\laundet_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.laundet2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\laundet\laundet_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.laundet3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\laundet\laundet_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.laundet4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\laundet\laundet_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.laundet5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\laundet\laundet_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.laundet6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\laundet\laundet_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.laundet7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\laundet\laundet_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.laundet8;
	infile "J:\IRI Data\Academic Dataset External\Year8\laundet\laundet_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.laundet9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\laundet\laundet_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.laundet10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\laundet\laundet_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.laundet11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\laundet\laundet_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.laundet (drop = SY GE VEND ITEM);
	set work.laundet1-work.laundet11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete laundet1-laundet11;

proc sort data = work.laundet;
	by week;
data work.laundet_dates (drop = week_start week_end month qtr);
	merge work.laundet (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_laundet.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_laundet.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_laundet.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.laundet_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.laundet_vol_sales as
select iri_key, week, sum(vol_eq * units) as laundet_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.laundet_revenue as
select iri_key, week, sum(dollars) as laundet
from work.laundet
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.laundet_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.laundet_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.laundet_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=laundet_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.laundet_dates;
	by colupc;
data work.laundet_dates_price;
	merge work.laundet_dates (in=in1) work.laundet_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.laundet_dates_price;
	merge work.laundet_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.laundet_count as
select iri_key, qtrid, q, count(distinct colupc) as laundet_count
from work.laundet_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.laundet_count out=work.laundet_count prefix=laundet_count_;
	by iri_key qtrid;
	id q;
	var laundet_count;
run;
data work.laundet_count (drop = _NAME_);
   set work.laundet_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete laundet laundet_dates laundet_dates_price pctl laundet_panel_cpi;

run;

*margbutr;

DATA work.margbutr1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\margbutr\margbutr_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.margbutr2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\margbutr\margbutr_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.margbutr3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\margbutr\margbutr_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.margbutr4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\margbutr\margbutr_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.margbutr5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\margbutr\margbutr_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.margbutr6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\margbutr\margbutr_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.margbutr7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\margbutr\margbutr_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.margbutr8;
	infile "J:\IRI Data\Academic Dataset External\Year8\margbutr\margbutr_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.margbutr9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\margbutr\margbutr_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.margbutr10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\margbutr\margbutr_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.margbutr11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\margbutr\margbutr_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.margbutr (drop = SY GE VEND ITEM);
	set work.margbutr1-work.margbutr11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete margbutr1-margbutr11;

proc sort data = work.margbutr;
	by week;
data work.margbutr_dates (drop = week_start week_end month qtr);
	merge work.margbutr (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_margbut.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_margbutr.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_margbutr.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.margbutr_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.margbutr_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.margbutr_vol_sales as
select iri_key, week, sum(vol_eq * units) as margbutr_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.margbutr_revenue as
select iri_key, week, sum(dollars) as margbutr
from work.margbutr
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.margbutr_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.margbutr_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.margbutr_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=margbutr_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.margbutr_dates;
	by colupc;
data work.margbutr_dates_price;
	merge work.margbutr_dates (in=in1) work.margbutr_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.margbutr_dates_price;
	merge work.margbutr_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.margbutr_count as
select iri_key, qtrid, q, count(distinct colupc) as margbutr_count
from work.margbutr_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.margbutr_count out=work.margbutr_count prefix=margbutr_count_;
	by iri_key qtrid;
	id q;
	var margbutr_count;
run;
data work.margbutr_count (drop = _NAME_);
   set work.margbutr_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete margbutr margbutr_dates margbutr_dates_price pctl margbutr_panel_cpi;

run;

*mayo;

DATA work.mayo1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\mayo\mayo_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mayo2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\mayo\mayo_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mayo3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\mayo\mayo_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mayo4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\mayo\mayo_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mayo5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\mayo\mayo_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mayo6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\mayo\mayo_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mayo7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\mayo\mayo_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mayo8;
	infile "J:\IRI Data\Academic Dataset External\Year8\mayo\mayo_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mayo9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\mayo\mayo_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mayo10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\mayo\mayo_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mayo11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\mayo\mayo_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.mayo (drop = SY GE VEND ITEM);
	set work.mayo1-work.mayo11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete mayo1-mayo11;

proc sort data = work.mayo;
	by week;
data work.mayo_dates (drop = week_start week_end month qtr);
	merge work.mayo (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_mayo.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_mayo.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_mayo.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.mayo_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.mayo_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.mayo_vol_sales as
select iri_key, week, sum(vol_eq * units) as mayo_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.mayo_revenue as
select iri_key, week, sum(dollars) as mayo
from work.mayo
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.mayo_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.mayo_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.mayo_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=mayo_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.mayo_dates;
	by colupc;
data work.mayo_dates_price;
	merge work.mayo_dates (in=in1) work.mayo_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.mayo_dates_price;
	merge work.mayo_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.mayo_count as
select iri_key, qtrid, q, count(distinct colupc) as mayo_count
from work.mayo_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.mayo_count out=work.mayo_count prefix=mayo_count_;
	by iri_key qtrid;
	id q;
	var mayo_count;
run;
data work.mayo_count (drop = _NAME_);
   set work.mayo_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete mayo mayo_dates mayo_dates_price pctl mayo_panel_cpi;

run;

*milk;

DATA work.milk1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\milk\milk_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.milk2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\milk\milk_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.milk3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\milk\milk_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.milk4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\milk\milk_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.milk5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\milk\milk_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.milk6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\milk\milk_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.milk7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\milk\milk_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.milk8;
	infile "J:\IRI Data\Academic Dataset External\Year8\milk\milk_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.milk9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\milk\milk_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.milk10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\milk\milk_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.milk11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\milk\milk_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.milk (drop = SY GE VEND ITEM);
	set work.milk1-work.milk11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete milk1-milk11;

proc sort data = work.milk;
	by week;
data work.milk_dates (drop = week_start week_end month qtr);
	merge work.milk (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_milk.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_milk.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_milk.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.milk_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.milk_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.milk_vol_sales as
select iri_key, week, sum(vol_eq * units) as milk_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.milk_revenue as
select iri_key, week, sum(dollars) as milk
from work.milk
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.milk_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.milk_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.milk_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=milk_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.milk_dates;
	by colupc;
data work.milk_dates_price;
	merge work.milk_dates (in=in1) work.milk_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.milk_dates_price;
	merge work.milk_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.milk_count as
select iri_key, qtrid, q, count(distinct colupc) as milk_count
from work.milk_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.milk_count out=work.milk_count prefix=milk_count_;
	by iri_key qtrid;
	id q;
	var milk_count;
run;
data work.milk_count (drop = _NAME_);
   set work.milk_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete milk milk_dates milk_dates_price pctl milk_panel_cpi;

run;

*mustketc;

DATA work.mustketc1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\mustketc\mustketc_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mustketc2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\mustketc\mustketc_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mustketc3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\mustketc\mustketc_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mustketc4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\mustketc\mustketc_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mustketc5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\mustketc\mustketc_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mustketc6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\mustketc\mustketc_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mustketc7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\mustketc\mustketc_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mustketc8;
	infile "J:\IRI Data\Academic Dataset External\Year8\mustketc\mustketc_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mustketc9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\mustketc\mustketc_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mustketc10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\mustketc\mustketc_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.mustketc11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\mustketc\mustketc_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.mustketc (drop = SY GE VEND ITEM);
	set work.mustketc1-work.mustketc11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete mustketc1-mustketc11;

proc sort data = work.mustketc;
	by week;
data work.mustketc_dates (drop = week_start week_end month qtr);
	merge work.mustketc (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_mustketc.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_mustketc.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_mustketc.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.mustketc_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.mustketc_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.mustketc_vol_sales as
select iri_key, week, sum(vol_eq * units) as mustketc_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.mustketc_revenue as
select iri_key, week, sum(dollars) as mustketc
from work.mustketc
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.mustketc_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.mustketc_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.mustketc_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=mustketc_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.mustketc_dates;
	by colupc;
data work.mustketc_dates_price;
	merge work.mustketc_dates (in=in1) work.mustketc_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.mustketc_dates_price;
	merge work.mustketc_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.mustketc_count as
select iri_key, qtrid, q, count(distinct colupc) as mustketc_count
from work.mustketc_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.mustketc_count out=work.mustketc_count prefix=mustketc_count_;
	by iri_key qtrid;
	id q;
	var mustketc_count;
run;
data work.mustketc_count (drop = _NAME_);
   set work.mustketc_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete mustketc mustketc_dates mustketc_dates_price pctl mustketc_panel_cpi;

run;

*paptowl;

DATA work.paptowl1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\paptowl\paptowl_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.paptowl2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\paptowl\paptowl_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.paptowl3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\paptowl\paptowl_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.paptowl4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\paptowl\paptowl_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.paptowl5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\paptowl\paptowl_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.paptowl6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\paptowl\paptowl_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.paptowl7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\paptowl\paptowl_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.paptowl8;
	infile "J:\IRI Data\Academic Dataset External\Year8\paptowls\paptowl_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.paptowl9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\paptowls\paptowl_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.paptowl10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\paptowls\paptowl_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.paptowl11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\paptowls\paptowl_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.paptowl (drop = SY GE VEND ITEM);
	set work.paptowl1-work.paptowl11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete paptowl1-paptowl11;

proc sort data = work.paptowl;
	by week;
data work.paptowl_dates (drop = week_start week_end month qtr);
	merge work.paptowl (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_ptowels.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_ptowels.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_paprtowls.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.paptowl_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.paptowl_vol_sales as
select iri_key, week, sum(vol_eq * units) as paptowl_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.paptowl_revenue as
select iri_key, week, sum(dollars) as paptowl
from work.paptowl
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.paptowl_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.paptowl_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.paptowl_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=paptowl_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.paptowl_dates;
	by colupc;
data work.paptowl_dates_price;
	merge work.paptowl_dates (in=in1) work.paptowl_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.paptowl_dates_price;
	merge work.paptowl_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.paptowl_count as
select iri_key, qtrid, q, count(distinct colupc) as paptowl_count
from work.paptowl_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.paptowl_count out=work.paptowl_count prefix=paptowl_count_;
	by iri_key qtrid;
	id q;
	var paptowl_count;
run;
data work.paptowl_count (drop = _NAME_);
   set work.paptowl_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete paptowl paptowl_dates paptowl_dates_price pctl paptowl_panel_cpi;

run;

*peanbutr;

DATA work.peanbutr1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\peanbutr\peanbutr_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.peanbutr2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\peanbutr\peanbutr_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.peanbutr3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\peanbutr\peanbutr_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.peanbutr4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\peanbutr\peanbutr_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.peanbutr5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\peanbutr\peanbutr_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.peanbutr6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\peanbutr\peanbutr_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.peanbutr7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\peanbutr\peanbutr_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.peanbutr8;
	infile "J:\IRI Data\Academic Dataset External\Year8\peanbutr\peanbutr_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.peanbutr9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\peanbutr\peanbutr_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.peanbutr10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\peanbutr\peanbutr_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.peanbutr11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\peanbutr\peanbutr_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.peanbutr (drop = SY GE VEND ITEM);
	set work.peanbutr1-work.peanbutr11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete peanbutr1-peanbutr11;

proc sort data = work.peanbutr;
	by week;
data work.peanbutr_dates (drop = week_start week_end month qtr);
	merge work.peanbutr (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_peanbutr.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq process);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_pbutr.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq process);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_peanbutr.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq process);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.peanbutr_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.peanbutr_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.peanbutr_vol_sales as
select iri_key, week, sum(vol_eq * units) as peanbutr_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.peanbutr_revenue as
select iri_key, week, sum(dollars) as peanbutr
from work.peanbutr
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.peanbutr_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.peanbutr_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.peanbutr_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=peanbutr_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.peanbutr_dates;
	by colupc;
data work.peanbutr_dates_price;
	merge work.peanbutr_dates (in=in1) work.peanbutr_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.peanbutr_dates_price;
	merge work.peanbutr_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.peanbutr_count as
select iri_key, qtrid, q, count(distinct colupc) as peanbutr_count
from work.peanbutr_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.peanbutr_count out=work.peanbutr_count prefix=peanbutr_count_;
	by iri_key qtrid;
	id q;
	var peanbutr_count;
run;
data work.peanbutr_count (drop = _NAME_);
   set work.peanbutr_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete peanbutr peanbutr_dates peanbutr_dates_price pctl peanbutr_panel_cpi;

run;

*photo;

DATA work.photo1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\photo\photo_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.photo2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\photo\photo_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.photo3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\photo\photo_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.photo4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\photo\photo_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.photo5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\photo\photo_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.photo6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\photo\photo_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.photo7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\photo\photo_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.photo8;
	infile "J:\IRI Data\Academic Dataset External\Year8\photo\photo_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.photo9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\photo\photo_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.photo10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\photo\photo_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.photo11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\photo\photo_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.photo (drop = SY GE VEND ITEM);
	set work.photo1-work.photo11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete photo1-photo11;

proc sort data = work.photo;
	by week;
data work.photo_dates (drop = week_start week_end month qtr);
	merge work.photo (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_photo.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_photo.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_photo.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.photo_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.photo_vol_sales as
select iri_key, week, sum(vol_eq * units) as photo_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.photo_revenue as
select iri_key, week, sum(dollars) as photo
from work.photo
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.photo_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.photo_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.photo_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=photo_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.photo_dates;
	by colupc;
data work.photo_dates_price;
	merge work.photo_dates (in=in1) work.photo_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.photo_dates_price;
	merge work.photo_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.photo_count as
select iri_key, qtrid, q, count(distinct colupc) as photo_count
from work.photo_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.photo_count out=work.photo_count prefix=photo_count_;
	by iri_key qtrid;
	id q;
	var photo_count;
run;
data work.photo_count (drop = _NAME_);
   set work.photo_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete photo photo_dates photo_dates_price pctl photo_panel_cpi;

run;
*razors;

DATA work.razors1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\razors\razors_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.razors2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\razors\razors_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.razors3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\razors\razors_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.razors4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\razors\razors_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.razors5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\razors\razors_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.razors6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\razors\razors_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.razors7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\razors\razors_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.razors8;
	infile "J:\IRI Data\Academic Dataset External\Year8\razors\razors_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.razors9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\razors\razors_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.razors10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\razors\razors_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.razors11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\razors\razors_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.razors (drop = SY GE VEND ITEM);
	set work.razors1-work.razors11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete razors1-razors11;

proc sort data = work.razors;
	by week;
data work.razors_dates (drop = week_start week_end month qtr);
	merge work.razors (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_razors.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_razors.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_razors.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.razors_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.razors_vol_sales as
select iri_key, week, sum(vol_eq * units) as razors_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.razors_revenue as
select iri_key, week, sum(dollars) as razors
from work.razors
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.razors_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.razors_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.razors_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=razors_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.razors_dates;
	by colupc;
data work.razors_dates_price;
	merge work.razors_dates (in=in1) work.razors_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.razors_dates_price;
	merge work.razors_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.razors_count as
select iri_key, qtrid, q, count(distinct colupc) as razors_count
from work.razors_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.razors_count out=work.razors_count prefix=razors_count_;
	by iri_key qtrid;
	id q;
	var razors_count;
run;
data work.razors_count (drop = _NAME_);
   set work.razors_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete razors razors_dates razors_dates_price pctl razors_panel_cpi;

run;

*saltsnck;

DATA work.saltsnck1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\saltsnck\saltsnck_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.saltsnck2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\saltsnck\saltsnck_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.saltsnck3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\saltsnck\saltsnck_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.saltsnck4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\saltsnck\saltsnck_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.saltsnck5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\saltsnck\saltsnck_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.saltsnck6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\saltsnck\saltsnck_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.saltsnck7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\saltsnck\saltsnck_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.saltsnck8;
	infile "J:\IRI Data\Academic Dataset External\Year8\saltsnck\saltsnck_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.saltsnck9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\saltsnck\saltsnck_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.saltsnck10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\saltsnck\saltsnck_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.saltsnck11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\saltsnck\saltsnck_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.saltsnck (drop = SY GE VEND ITEM);
	set work.saltsnck1-work.saltsnck11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete saltsnck1-saltsnck11;

proc sort data = work.saltsnck;
	by week;
data work.saltsnck_dates (drop = week_start week_end month qtr);
	merge work.saltsnck (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_saltsnck.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_saltsnck.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_saltsnck.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.saltsnck_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.saltsnck_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.saltsnck_vol_sales as
select iri_key, week, sum(vol_eq * units) as saltsnck_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.saltsnck_revenue as
select iri_key, week, sum(dollars) as saltsnck
from work.saltsnck
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.saltsnck_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.saltsnck_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.saltsnck_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=saltsnck_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.saltsnck_dates;
	by colupc;
data work.saltsnck_dates_price;
	merge work.saltsnck_dates (in=in1) work.saltsnck_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.saltsnck_dates_price;
	merge work.saltsnck_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.saltsnck_count as
select iri_key, qtrid, q, count(distinct colupc) as saltsnck_count
from work.saltsnck_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.saltsnck_count out=work.saltsnck_count prefix=saltsnck_count_;
	by iri_key qtrid;
	id q;
	var saltsnck_count;
run;
data work.saltsnck_count (drop = _NAME_);
   set work.saltsnck_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete saltsnck saltsnck_dates saltsnck_dates_price pctl saltsnck_panel_cpi;

run;

*shamp;

DATA work.shamp1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\shamp\shamp_groc_1114_1165.csv" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.shamp2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\shamp\shamp_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.shamp3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\shamp\shamp_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.shamp4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\shamp\shamp_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.shamp5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\shamp\shamp_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.shamp6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\shamp\shamp_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.shamp7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\shamp\shamp_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.shamp8;
	infile "J:\IRI Data\Academic Dataset External\Year8\shamp\shamp_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.shamp9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\shamp\shamp_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.shamp10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\shamp\shamp_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.shamp11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\shamp\shamp_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.shamp (drop = SY GE VEND ITEM);
	set work.shamp1-work.shamp11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete shamp1-shamp11;

proc sort data = work.shamp;
	by week;
data work.shamp_dates (drop = week_start week_end month qtr);
	merge work.shamp (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_shamp.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_shamp.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_shamp.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.shamp_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.shamp_vol_sales as
select iri_key, week, sum(vol_eq * units) as shamp_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.shamp_revenue as
select iri_key, week, sum(dollars) as shamp
from work.shamp
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.shamp_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.shamp_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.shamp_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=shamp_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.shamp_dates;
	by colupc;
data work.shamp_dates_price;
	merge work.shamp_dates (in=in1) work.shamp_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.shamp_dates_price;
	merge work.shamp_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.shamp_count as
select iri_key, qtrid, q, count(distinct colupc) as shamp_count
from work.shamp_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.shamp_count out=work.shamp_count prefix=shamp_count_;
	by iri_key qtrid;
	id q;
	var shamp_count;
run;
data work.shamp_count (drop = _NAME_);
   set work.shamp_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete shamp shamp_dates shamp_dates_price pctl shamp_panel_cpi;

run;

*soup;

DATA work.soup1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\soup\soup_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.soup2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\soup\soup_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.soup3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\soup\soup_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.soup4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\soup\soup_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.soup5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\soup\soup_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.soup6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\soup\soup_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.soup7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\soup\soup_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.soup8;
	infile "J:\IRI Data\Academic Dataset External\Year8\soup\soup_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.soup9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\soup\soup_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.soup10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\soup\soup_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.soup11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\soup\soup_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.soup (drop = SY GE VEND ITEM);
	set work.soup1-work.soup11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete soup1-soup11;

proc sort data = work.soup;
	by week;
data work.soup_dates (drop = week_start week_end month qtr);
	merge work.soup (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
proc sql;
create table work.soup_revenue as
select iri_key, week, sum(dollars) as soup
from work.soup
group by iri_key, week
order by iri_key, week;
quit;
run;
proc sql;
create table work.soup_upc_price as
select iri_key, colupc, week, dollars / units as upc_price, sum(units) as sales
from work.soup
group by iri_key, colupc, week
order by iri_key, colupc, week;
quit;
run;
proc sort data = work.soup_upc_price;
	by week;
data work.soup_upc_price (drop = week_start week_end month qtr);
	merge work.soup_upc_price (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
run;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_soup.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;
	if colupc ne .;
	vol_eq2 = vol_eq + 0;
	drop vol_eq;
	rename vol_eq2 = vol_eq;
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_soup.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_soup.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.soup_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.soup_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.soup_vol_sales as
select iri_key, week, sum(vol_eq * units) as soup_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.soup_revenue as
select iri_key, week, sum(dollars) as soup
from work.soup
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.soup_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.soup_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.soup_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=soup_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.soup_dates;
	by colupc;
data work.soup_dates_price;
	merge work.soup_dates (in=in1) work.soup_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.soup_dates_price;
	merge work.soup_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.soup_count as
select iri_key, qtrid, q, count(distinct colupc) as soup_count
from work.soup_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.soup_count out=work.soup_count prefix=soup_count_;
	by iri_key qtrid;
	id q;
	var soup_count;
run;
data work.soup_count (drop = _NAME_);
   set work.soup_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete soup soup_dates soup_dates_price pctl soup_panel_cpi;

run;

*spagsauc;

DATA work.spagsauc1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\spagsauc\spagsauc_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.spagsauc2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\spagsauc\spagsauc_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.spagsauc3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\spagsauc\spagsauc_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.spagsauc4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\spagsauc\spagsauc_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.spagsauc5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\spagsauc\spagsauc_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.spagsauc6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\spagsauc\spagsauc_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.spagsauc7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\spagsauc\spagsauc_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.spagsauc8;
	infile "J:\IRI Data\Academic Dataset External\Year8\spagsauc\spagsauc_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.spagsauc9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\spagsauc\spagsauc_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.spagsauc10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\spagsauc\spagsauc_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.spagsauc11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\spagsauc\spagsauc_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.spagsauc (drop = SY GE VEND ITEM);
	set work.spagsauc1-work.spagsauc11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete spagsauc1-spagsauc11;

proc sort data = work.spagsauc;
	by week;
data work.spagsauc_dates (drop = week_start week_end month qtr);
	merge work.spagsauc (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_sauce.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq product_type);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_spagsauc.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq product_type);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_spagsauc.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq product_type);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.spagsauc_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.spagsauc_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.spagsauc_vol_sales as
select iri_key, week, sum(vol_eq * units) as spagsauc_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.spagsauc_revenue as
select iri_key, week, sum(dollars) as spagsauc
from work.spagsauc
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.spagsauc_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.spagsauc_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.spagsauc_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=spagsauc_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.spagsauc_dates;
	by colupc;
data work.spagsauc_dates_price;
	merge work.spagsauc_dates (in=in1) work.spagsauc_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.spagsauc_dates_price;
	merge work.spagsauc_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.spagsauc_count as
select iri_key, qtrid, q, count(distinct colupc) as spagsauc_count
from work.spagsauc_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.spagsauc_count out=work.spagsauc_count prefix=spagsauc_count_;
	by iri_key qtrid;
	id q;
	var spagsauc_count;
run;
data work.spagsauc_count (drop = _NAME_);
   set work.spagsauc_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete spagsauc spagsauc_dates spagsauc_dates_price pctl spagsauc_panel_cpi;

run;

*sugarsub;

DATA work.sugarsub1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\sugarsub\sugarsub_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.sugarsub2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\sugarsub\sugarsub_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.sugarsub3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\sugarsub\sugarsub_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.sugarsub4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\sugarsub\sugarsub_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.sugarsub5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\sugarsub\sugarsub_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.sugarsub6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\sugarsub\sugarsub_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.sugarsub7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\sugarsub\sugarsub_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.sugarsub8;
	infile "J:\IRI Data\Academic Dataset External\Year8\sugarsub\sugarsub_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.sugarsub9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\sugarsub\sugarsub_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.sugarsub10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\sugarsub\sugarsub_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.sugarsub11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\sugarsub\sugarsub_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.sugarsub (drop = SY GE VEND ITEM);
	set work.sugarsub1-work.sugarsub11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete sugarsub1-sugarsub11;

proc sort data = work.sugarsub;
	by week;
data work.sugarsub_dates (drop = week_start week_end month qtr);
	merge work.sugarsub (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_sugar.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_sugarsub.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_sugarsub.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.sugarsub_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.sugarsub_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.sugarsub_vol_sales as
select iri_key, week, sum(vol_eq * units) as sugarsub_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.sugarsub_revenue as
select iri_key, week, sum(dollars) as sugarsub
from work.sugarsub
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.sugarsub_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.sugarsub_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.sugarsub_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=sugarsub_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.sugarsub_dates;
	by colupc;
data work.sugarsub_dates_price;
	merge work.sugarsub_dates (in=in1) work.sugarsub_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.sugarsub_dates_price;
	merge work.sugarsub_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.sugarsub_count as
select iri_key, qtrid, q, count(distinct colupc) as sugarsub_count
from work.sugarsub_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.sugarsub_count out=work.sugarsub_count prefix=sugarsub_count_;
	by iri_key qtrid;
	id q;
	var sugarsub_count;
run;
data work.sugarsub_count (drop = _NAME_);
   set work.sugarsub_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete sugarsub sugarsub_dates sugarsub_dates_price pctl sugarsub_panel_cpi;

run;

*toitisu;

DATA work.toitisu1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\toitisu\toitisu_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toitisu2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\toitisu\toitisu_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toitisu3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\toitisu\toitisu_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toitisu4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\toitisu\toitisu_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toitisu5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\toitisu\toitisu_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toitisu6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\toitisu\toitisu_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toitisu7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\toitisu\toitisu_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toitisu8;
	infile "J:\IRI Data\Academic Dataset External\Year8\toitisu\toitisu_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toitisu9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\toitisu\toitisu_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toitisu10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\toitisu\toitisu_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toitisu11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\toitisu\toitisu_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.toitisu (drop = SY GE VEND ITEM);
	set work.toitisu1-work.toitisu11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete toitisu1-toitisu11;

proc sort data = work.toitisu;
	by week;
data work.toitisu_dates (drop = week_start week_end month qtr);
	merge work.toitisu (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_toilet.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_toitisu.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_toitisu.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.toitisu_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.toitisu_vol_sales as
select iri_key, week, sum(vol_eq * units) as toitisu_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.toitisu_revenue as
select iri_key, week, sum(dollars) as toitisu
from work.toitisu
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.toitisu_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.toitisu_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.toitisu_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=toitisu_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.toitisu_dates;
	by colupc;
data work.toitisu_dates_price;
	merge work.toitisu_dates (in=in1) work.toitisu_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.toitisu_dates_price;
	merge work.toitisu_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.toitisu_count as
select iri_key, qtrid, q, count(distinct colupc) as toitisu_count
from work.toitisu_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.toitisu_count out=work.toitisu_count prefix=toitisu_count_;
	by iri_key qtrid;
	id q;
	var toitisu_count;
run;
data work.toitisu_count (drop = _NAME_);
   set work.toitisu_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete toitisu toitisu_dates toitisu_dates_price pctl toitisu_panel_cpi;

run;

*toothbr;

DATA work.toothbr1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\toothbr\toothbr_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothbr2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\toothbr\toothbr_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothbr3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\toothbr\toothbr_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothbr4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\toothbr\toothbr_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothbr5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\toothbr\toothbr_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothbr6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\toothbr\toothbr_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothbr7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\toothbr\toothbr_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothbr8;
	infile "J:\IRI Data\Academic Dataset External\Year8\toothbr\toothbr_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothbr9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\toothbr\toothbr_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothbr10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\toothbr\toothbr_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothbr11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\toothbr\toothbr_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.toothbr (drop = SY GE VEND ITEM);
	set work.toothbr1-work.toothbr11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete toothbr1-toothbr11;

proc sort data = work.toothbr;
	by week;
data work.toothbr_dates (drop = week_start week_end month qtr);
	merge work.toothbr (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_tooth.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_tbrush.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_toothbr.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.toothbr_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.toothbr_vol_sales as
select iri_key, week, sum(vol_eq * units) as toothbr_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.toothbr_revenue as
select iri_key, week, sum(dollars) as toothbr
from work.toothbr
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.toothbr_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.toothbr_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.toothbr_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=toothbr_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.toothbr_dates;
	by colupc;
data work.toothbr_dates_price;
	merge work.toothbr_dates (in=in1) work.toothbr_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.toothbr_dates_price;
	merge work.toothbr_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.toothbr_count as
select iri_key, qtrid, q, count(distinct colupc) as toothbr_count
from work.toothbr_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.toothbr_count out=work.toothbr_count prefix=toothbr_count_;
	by iri_key qtrid;
	id q;
	var toothbr_count;
run;
data work.toothbr_count (drop = _NAME_);
   set work.toothbr_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete toothbr toothbr_dates toothbr_dates_price pctl toothbr_panel_cpi;

run;

*toothpa;

DATA work.toothpa1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\toothpa\toothpa_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothpa2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\toothpa\toothpa_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothpa3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\toothpa\toothpa_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothpa4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\toothpa\toothpa_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothpa5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\toothpa\toothpa_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothpa6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\toothpa\toothpa_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothpa7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\toothpa\toothpa_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothpa8;
	infile "J:\IRI Data\Academic Dataset External\Year8\toothpa\toothpa_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothpa9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\toothpa\toothpa_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothpa10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\toothpa\toothpa_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.toothpa11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\toothpa\toothpa_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.toothpa (drop = SY GE VEND ITEM);
	set work.toothpa1-work.toothpa11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete toothpa1-toothpa11;

proc sort data = work.toothpa;
	by week;
data work.toothpa_dates (drop = week_start week_end month qtr);
	merge work.toothpa (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_paste.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_tpaste.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_toothpa.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.toothpa_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
*Unit sales;
proc sql;
create table work.toothpa_vol_sales as
select iri_key, week, sum(vol_eq * units) as toothpa_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.toothpa_revenue as
select iri_key, week, sum(dollars) as toothpa
from work.toothpa
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.toothpa_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.toothpa_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.toothpa_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=toothpa_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.toothpa_dates;
	by colupc;
data work.toothpa_dates_price;
	merge work.toothpa_dates (in=in1) work.toothpa_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.toothpa_dates_price;
	merge work.toothpa_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.toothpa_count as
select iri_key, qtrid, q, count(distinct colupc) as toothpa_count
from work.toothpa_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.toothpa_count out=work.toothpa_count prefix=toothpa_count_;
	by iri_key qtrid;
	id q;
	var toothpa_count;
run;
data work.toothpa_count (drop = _NAME_);
   set work.toothpa_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete toothpa toothpa_dates toothpa_dates_price pctl toothpa_panel_cpi;

run;

*yogurt;

DATA work.yogurt1;
	infile "J:\IRI Data\Academic Dataset External\Year1\External\yogurt\yogurt_groc_1114_1165" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.yogurt2;
	infile "J:\IRI Data\Academic Dataset External\Year2\External\yogurt\yogurt_groc_1166_1217" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.yogurt3;
	infile "J:\IRI Data\Academic Dataset External\Year3\External\yogurt\yogurt_groc_1218_1269" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.yogurt4;
	infile "J:\IRI Data\Academic Dataset External\Year4\External\yogurt\yogurt_groc_1270_1321" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.yogurt5;
	infile "J:\IRI Data\Academic Dataset External\Year5\External\yogurt\yogurt_groc_1322_1373" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.yogurt6;
	infile "J:\IRI Data\Academic Dataset External\Year6\External\yogurt\yogurt_groc_1374_1426" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.yogurt7;
	infile "J:\IRI Data\Academic Dataset External\Year7\External\yogurt\yogurt_groc_1427_1478" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.yogurt8;
	infile "J:\IRI Data\Academic Dataset External\Year8\yogurt\yogurt_groc_1479_1530" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.yogurt9;
	infile "J:\IRI Data\Academic Dataset External\Year9\External\yogurt\yogurt_groc_1531_1582" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.yogurt10;
	infile "J:\IRI Data\Academic Dataset External\Year10\External\yogurt\yogurt_groc_1583_1634" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;
DATA work.yogurt11;
	infile "J:\IRI Data\Academic Dataset External\Year11\External\yogurt\yogurt_groc_1635_1686" firstobs=2;
	input IRI_KEY WEEK SY GE VEND ITEM UNITS DOLLARS  F $ D PR;
run;

DATA work.yogurt (drop = SY GE VEND ITEM);
	set work.yogurt1-work.yogurt11;
	format colupc best18.;
	colupc = sy*100000000000+ge*10000000000+vend*100000+item;
proc datasets library = work;
	delete yogurt1-yogurt11;

proc sort data = work.yogurt;
	by week;
data work.yogurt_dates (drop = week_start week_end month qtr);
	merge work.yogurt (in=in1) perm.dates (in=in2);
	by week;
	if in1=in2;
*****PROD CHAR*********;
PROC IMPORT OUT= WORK.prod6
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files\prod_yogurt.xls"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod6 (keep = product colupc vol_eq);
	set work.prod6;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod7
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2007\prod_yogurt.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod7 (keep = product colupc vol_eq);
	set work.prod7;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

PROC IMPORT OUT= WORK.prod11
	DATAFILE= "J:\IRI Data\Academic Dataset External\parsed stub files 2008-2011\prod11_yogurt.xlsx"
    DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;
data work.prod11 (keep = product colupc vol_eq);
	set work.prod11;
	IF L2 = " " then delete;
	PRODUCT = L5;
	PARENT = L3;
	VENDOR = L4;
	SY = input(SY,2.0) ; GE = input(GE,4.0) ; VEND = input(VEND,6.0) ;
    ITEM = input(ITEM,6.0);
	format colupc best18.;
	COLUPC = SY*100000000000+GE*10000000000+VEND*100000+ITEM;   
run;

proc sort data = work.prod6;
	by colupc;
proc sort data = work.prod7;
	by colupc;
proc sort data = work.prod11;
	by colupc;

data work.panel6 work.panel7 work.panel11;
	set work.yogurt_dates;
	if year < 7 then output work.panel6;
	if year = 7 then output work.panel7;
	if year > 7 then output work.panel11;
run;
proc sort data = work.panel6;
	by colupc;
proc sort data = work.panel7;
	by colupc;
proc sort data = work.panel11;
	by colupc;
run;
data work.panel_prod6;
	merge work.panel6 (in=in1) work.prod6 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod7;
	merge work.panel7 (in=in1) work.prod7 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod11;
	merge work.panel11 (in=in1) work.prod11 (in=in2);
	by colupc;
	if in1 and in2;
run;
data work.panel_prod;
	set work.panel_prod6 work.panel_prod7 work.panel_prod11;
run;
data work.yogurt_panel;
	set work.panel_prod;
run;
*Unit sales;
proc sql;
create table work.yogurt_vol_sales as
select iri_key, week, sum(vol_eq * units) as yogurt_vol_sales
from work.panel_prod
group by iri_key, week
order by iri_key, week;
quit;
run;
proc datasets library = work;
	delete panel_prod6 panel_prod7 panel_prod11 prod6 prod7 prod11 panel6 panel7 panel11;
run;
*Revenue;
proc sql;
create table work.yogurt_revenue as
select iri_key, week, sum(dollars) as yogurt
from work.yogurt
group by iri_key, week
order by iri_key, week;
quit;
run;
*Quarterly UPC count;
proc sort data = work.panel_prod;
	by monthid;
data work.yogurt_panel_cpi;
	merge work.panel_prod work.cpi;
	by monthid;
run;
proc sql;
create table work.yogurt_upc_price as
select colupc, mean((dollars/cpi * 100) / (units * vol_eq)) as avg_price
from work.yogurt_panel_cpi
group by colupc
order by colupc;
quit;
proc univariate data=yogurt_upc_price noprint;
	var avg_price;
	output out=pctl p25=p25 p50=p50 p75=p75;
run;
data work.pctl;
	set work.pctl;
	n = 1;
run;	
proc sort data = work.yogurt_dates;
	by colupc;
data work.yogurt_dates_price;
	merge work.yogurt_dates (in=in1) work.yogurt_upc_price (in=in2);
	by colupc;
	if in1 and in2;
	n = 1;
run;
data work.yogurt_dates_price;
	merge work.yogurt_dates_price work.pctl;
	by n;
	if avg_price < p25 then q = 1;
	if avg_price >= p25 & avg_price < p50 then q = 2;
	if avg_price >= p50 & avg_price < p75 then q = 3;
	if avg_price >= p75 then q = 4;
run;
proc sql;
create table work.yogurt_count as
select iri_key, qtrid, q, count(distinct colupc) as yogurt_count
from work.yogurt_dates_price
group by iri_key, qtrid, q
order by iri_key, qtrid, q;
quit;
proc transpose data=work.yogurt_count out=work.yogurt_count prefix=yogurt_count_;
	by iri_key qtrid;
	id q;
	var yogurt_count;
run;
data work.yogurt_count (drop = _NAME_);
   set work.yogurt_count;
   array change _numeric_;
        do over change;
            if change=. then change=0;
        end;
 run;

proc datasets library = work;
	delete yogurt yogurt_dates yogurt_dates_price pctl yogurt_panel_cpi;

run;

**************;

data work.revenue;
	merge beer_revenue blades_revenue carbbev_revenue cigets_revenue coffee_revenue coldcer_revenue deod_revenue diapers_revenue factiss_revenue fzdinent_revenue
		fzpizza_revenue hhclean_revenue hotdog_revenue laundet_revenue margbutr_revenue mayo_revenue milk_revenue mustketc_revenue paptowl_revenue
		peanbutr_revenue photo_revenue razors_revenue saltsnck_revenue shamp_revenue soup_revenue spagsauc_revenue sugarsub_revenue toitisu_revenue
		toothbr_revenue toothpa_revenue yogurt_revenue;
	by iri_key week;
		ARRAY CHANGE _NUMERIC_;
		DO OVER CHANGE;
		IF CHANGE=. THEN CHANGE=0;
		END;	

	revenue = beer+blades+carbbev+cigets+coffee+coldcer+deod+diapers+factiss+fzdinent+
		fzpizza+hhclean+hotdog+laundet+margbutr+mayo+milk+mustketc+paptowl+
		peanbutr+photo+razors+saltsnck+shamp+soup+spagsauc+sugarsub+toitisu+
		toothbr+toothpa+yogurt;
	foodrev = carbbev+coffee+coldcer+fzdinent+
		fzpizza+hotdog+margbutr+mayo+mustketc+
		peanbutr+saltsnck+shamp+soup+spagsauc+sugarsub+
		yogurt;
run;

data work.vol_sales;
	merge beer_vol_sales blades_vol_sales carbbev_vol_sales cigets_vol_sales coffee_vol_sales coldcer_vol_sales deod_vol_sales diapers_vol_sales factiss_vol_sales fzdinent_vol_sales
		fzpizza_vol_sales hhclean_vol_sales hotdog_vol_sales laundet_vol_sales margbutr_vol_sales mayo_vol_sales milk_vol_sales mustketc_vol_sales paptowl_vol_sales
		peanbutr_vol_sales razors_vol_sales saltsnck_vol_sales shamp_vol_sales soup_vol_sales spagsauc_vol_sales sugarsub_vol_sales toitisu_vol_sales
		toothbr_vol_sales toothpa_vol_sales yogurt_vol_sales;
	by iri_key week;
		ARRAY CHANGE _NUMERIC_;
		DO OVER CHANGE;
		IF CHANGE=. THEN CHANGE=0;
		END;	

run;

data work.count;
	merge beer_count blades_count carbbev_count cigets_count coffee_count coldcer_count deod_count diapers_count factiss_count fzdinent_count
		fzpizza_count hhclean_count hotdog_count laundet_count margbutr_count mayo_count milk_count mustketc_count paptowl_count
		peanbutr_count photo_count razors_count saltsnck_count shamp_count soup_count spagsauc_count sugarsub_count toitisu_count
		toothbr_count toothpa_count yogurt_count;
	by iri_key qtrid;

	first = first.iri_key;
	last = last.iri_key;
proc sort data = work.count;
	by iri_key qtrid;
data work.count;
	set work.count;
			by iri_key qtrid;
		ARRAY CHANGE _NUMERIC_;
		DO OVER CHANGE;
		IF CHANGE=. THEN CHANGE=0;
		END;	
run;

****** Merge everything (revenue and sales) *****;
data work.merge;
	merge work.revenue work.vol_sales;
	by iri_key week;
		ARRAY CHANGE _NUMERIC_;
		DO OVER CHANGE;
		IF CHANGE=. THEN CHANGE=0;
		END;
run;

proc sort data = work.merge;
	by week;
data work.merge_dates (drop = week qtr week_start week_end);
	merge work.merge (in=in1) iri.dates (in=in2);
	by week;
	weekid = week - 1113;
	if in1 and in2;
run;

proc sort data = work.merge_dates;
	by iri_key;
DATA work.merge_dates_markets;
	merge work.merge_dates (in = in1) iri.store_markets (in = in2);
	by iri_key;
	if in1 and in2;
run;

****** Merge everything (count) *****;
proc sort data = work.count;
	by qtrid;
data work.merge_count_dates (drop = week week_start week_end month monthid);
	merge work.count (in=in1) iri.dates (in=in2);
	by qtrid;
	if in1 and in2;
run;

proc sort data = work.merge_count_dates;
	by iri_key;
DATA work.merge_count_dates_markets;
	merge work.merge_count_dates (in = in1) iri.store_markets (in = in2);
	by iri_key;
	if in1 and in2;
run;

